-
Notifications
You must be signed in to change notification settings - Fork 14.7k
Scalarize vector mad
operations for integer types
#152228
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-directx Author: Kaitlin Peng (kmpeng) ChangesFixes #152220.
Full diff: https://github.com/llvm/llvm-project/pull/152228.diff 3 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index ffd900c68893f..5153d24070dc9 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -56,6 +56,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_wave_reduce_sum:
case Intrinsic::dx_wave_reduce_umax:
case Intrinsic::dx_wave_reduce_usum:
+ case Intrinsic::dx_imad:
+ case Intrinsic::dx_umad:
return true;
default:
return false;
diff --git a/llvm/test/CodeGen/DirectX/imad.ll b/llvm/test/CodeGen/DirectX/imad.ll
index 5d9463d658cf5..2e612f0182ddc 100644
--- a/llvm/test/CodeGen/DirectX/imad.ll
+++ b/llvm/test/CodeGen/DirectX/imad.ll
@@ -1,17 +1,13 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
-; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
-; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
-
-; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.7-library"
; Function Attrs: noinline nounwind optnone
define noundef i16 @imad_short(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
entry:
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
%p2.addr = alloca i16, align 2
%p1.addr = alloca i16, align 2
%p0.addr = alloca i16, align 2
@@ -31,6 +27,7 @@ declare i16 @llvm.dx.imad.i16(i16, i16, i16) #1
; Function Attrs: noinline nounwind optnone
define noundef i32 @imad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
entry:
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i32, align 4
%p1.addr = alloca i32, align 4
%p0.addr = alloca i32, align 4
@@ -50,6 +47,7 @@ declare i32 @llvm.dx.imad.i32(i32, i32, i32) #1
; Function Attrs: noinline nounwind optnone
define noundef i64 @imad_int64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
entry:
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i64, align 8
%p1.addr = alloca i64, align 8
%p0.addr = alloca i64, align 8
@@ -65,3 +63,95 @@ entry:
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare i64 @llvm.dx.imad.i64(i64, i64, i64) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i16> @imad_int16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i16> %p0, i64 0
+ ; CHECK: extractelement <4 x i16> %p1, i64 0
+ ; CHECK: extractelement <4 x i16> %p2, i64 0
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 1
+ ; CHECK: extractelement <4 x i16> %p1, i64 1
+ ; CHECK: extractelement <4 x i16> %p2, i64 1
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 2
+ ; CHECK: extractelement <4 x i16> %p1, i64 2
+ ; CHECK: extractelement <4 x i16> %p2, i64 2
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 3
+ ; CHECK: extractelement <4 x i16> %p1, i64 3
+ ; CHECK: extractelement <4 x i16> %p2, i64 3
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
+ %dx.imad = call <4 x i16> @llvm.dx.imad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2)
+ ret <4 x i16> %dx.imad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i16> @llvm.dx.imad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i32> @imad_int4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i32> %p0, i64 0
+ ; CHECK: extractelement <4 x i32> %p1, i64 0
+ ; CHECK: extractelement <4 x i32> %p2, i64 0
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 1
+ ; CHECK: extractelement <4 x i32> %p1, i64 1
+ ; CHECK: extractelement <4 x i32> %p2, i64 1
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 2
+ ; CHECK: extractelement <4 x i32> %p1, i64 2
+ ; CHECK: extractelement <4 x i32> %p2, i64 2
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 3
+ ; CHECK: extractelement <4 x i32> %p1, i64 3
+ ; CHECK: extractelement <4 x i32> %p2, i64 3
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
+ %dx.imad = call <4 x i32> @llvm.dx.imad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2)
+ ret <4 x i32> %dx.imad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i32> @llvm.dx.imad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i64> @imad_int64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i64> %p0, i64 0
+ ; CHECK: extractelement <4 x i64> %p1, i64 0
+ ; CHECK: extractelement <4 x i64> %p2, i64 0
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 1
+ ; CHECK: extractelement <4 x i64> %p1, i64 1
+ ; CHECK: extractelement <4 x i64> %p2, i64 1
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 2
+ ; CHECK: extractelement <4 x i64> %p1, i64 2
+ ; CHECK: extractelement <4 x i64> %p2, i64 2
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 3
+ ; CHECK: extractelement <4 x i64> %p1, i64 3
+ ; CHECK: extractelement <4 x i64> %p2, i64 3
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
+ %dx.imad = call <4 x i64> @llvm.dx.imad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2)
+ ret <4 x i64> %dx.imad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i64> @llvm.dx.imad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/umad.ll b/llvm/test/CodeGen/DirectX/umad.ll
index 104d2380af66b..76516a2a64637 100644
--- a/llvm/test/CodeGen/DirectX/umad.ll
+++ b/llvm/test/CodeGen/DirectX/umad.ll
@@ -1,17 +1,13 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
-; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
-; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
-
-; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.7-library"
; Function Attrs: noinline nounwind optnone
define noundef i16 @umad_ushort(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
entry:
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
%p2.addr = alloca i16, align 2
%p1.addr = alloca i16, align 2
%p0.addr = alloca i16, align 2
@@ -31,6 +27,7 @@ declare i16 @llvm.dx.umad.i16(i16, i16, i16) #1
; Function Attrs: noinline nounwind optnone
define noundef i32 @umad_uint(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
entry:
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i32, align 4
%p1.addr = alloca i32, align 4
%p0.addr = alloca i32, align 4
@@ -50,6 +47,7 @@ declare i32 @llvm.dx.umad.i32(i32, i32, i32) #1
; Function Attrs: noinline nounwind optnone
define noundef i64 @umad_uint64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
entry:
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i64, align 8
%p1.addr = alloca i64, align 8
%p0.addr = alloca i64, align 8
@@ -65,3 +63,95 @@ entry:
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare i64 @llvm.dx.umad.i64(i64, i64, i64) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i16> @umad_uint16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i16> %p0, i64 0
+ ; CHECK: extractelement <4 x i16> %p1, i64 0
+ ; CHECK: extractelement <4 x i16> %p2, i64 0
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 1
+ ; CHECK: extractelement <4 x i16> %p1, i64 1
+ ; CHECK: extractelement <4 x i16> %p2, i64 1
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 2
+ ; CHECK: extractelement <4 x i16> %p1, i64 2
+ ; CHECK: extractelement <4 x i16> %p2, i64 2
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 3
+ ; CHECK: extractelement <4 x i16> %p1, i64 3
+ ; CHECK: extractelement <4 x i16> %p2, i64 3
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
+ %dx.umad = call <4 x i16> @llvm.dx.umad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2)
+ ret <4 x i16> %dx.umad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i16> @llvm.dx.umad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i32> @umad_uint4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i32> %p0, i64 0
+ ; CHECK: extractelement <4 x i32> %p1, i64 0
+ ; CHECK: extractelement <4 x i32> %p2, i64 0
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 1
+ ; CHECK: extractelement <4 x i32> %p1, i64 1
+ ; CHECK: extractelement <4 x i32> %p2, i64 1
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 2
+ ; CHECK: extractelement <4 x i32> %p1, i64 2
+ ; CHECK: extractelement <4 x i32> %p2, i64 2
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 3
+ ; CHECK: extractelement <4 x i32> %p1, i64 3
+ ; CHECK: extractelement <4 x i32> %p2, i64 3
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
+ %dx.umad = call <4 x i32> @llvm.dx.umad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2)
+ ret <4 x i32> %dx.umad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i32> @llvm.dx.umad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i64> @umad_uint64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i64> %p0, i64 0
+ ; CHECK: extractelement <4 x i64> %p1, i64 0
+ ; CHECK: extractelement <4 x i64> %p2, i64 0
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 1
+ ; CHECK: extractelement <4 x i64> %p1, i64 1
+ ; CHECK: extractelement <4 x i64> %p2, i64 1
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 2
+ ; CHECK: extractelement <4 x i64> %p1, i64 2
+ ; CHECK: extractelement <4 x i64> %p2, i64 2
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 3
+ ; CHECK: extractelement <4 x i64> %p1, i64 3
+ ; CHECK: extractelement <4 x i64> %p2, i64 3
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
+ %dx.umad = call <4 x i64> @llvm.dx.umad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2)
+ ret <4 x i64> %dx.umad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i64> @llvm.dx.umad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/17235 Here is the relevant piece of the build log for the reference
|
Fixes #152220.
dx_imad
anddx_umad
toisTargetIntrinsicTriviallyScalarizable